import requests
import pymongo
from bs4 import BeautifulSoup

headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':'gzip, deflate',
    'Accept-Language':'zh-CN,zh;q=0.9',
    'Connection':'keep-alive',
    'Cookie':'global_cookie=tie6dfq0icrtrw5dr7ybqarkv20jipjb9gq; __utmc=147393320; __utmz=147393320.1529645293.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=147393320.950755443.1529645293.1529645293.1529649154.2; __utmt_t0=1; __utmt_t1=1; __utmt_t2=1; __utmb=147393320.3.10.1529649154; unique_cookie=U_tie6dfq0icrtrw5dr7ybqarkv20jipjb9gq*6',
    'Host':'esf.sz.fang.com',
    'Referer':'http://esf.sz.fang.com/housing/__0_0_0_0_1_0_0_0/',
    'Upgrade-Insecure-Requests':'1',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
}

MONGO_URL='localhost'
MONGO_DB='House'
MONGO_TABLE='info'
client = pymongo.MongoClient(MONGO_URL)
db = client[MONGO_DB]
myset = db[MONGO_TABLE]

def get_one_page(url):
    response = requests.get(url=url,headers=headers)
    return response.text


def save_to_mongo(house):
    if myset.insert(house):
        print('插入成功',house)
    else:
        print('插入失败',house)


def get_house_info(html):
    soup = BeautifulSoup(html,'lxml')
    names = soup.select('.houseList .list .plotListwrap .plotTit')
    type = soup.select('.houseList .list .plotListwrap .plotFangType')
    addr = soup.select('.houseList .list .plotListwrap dd p')[1::3]
    selling = soup.select('.houseList .list .plotListwrap .sellOrRenthy li')[::3]
    selled = soup.select('.houseList .list .plotListwrap .sellOrRenthy li')[1::3]
    year = soup.select('.houseList .list .plotListwrap .sellOrRenthy li')[2::3]
    price = soup.select('.houseList .list .listRiconwrap .priceAverage')
    ratio = soup.select('.houseList .list .listRiconwrap .ratio')

    for i in range(len(names)):
        house = {
            'name' : names[i].text.strip(),
            'type' : type[i].text.strip(),
            'addr': addr[i].text.strip(),
            'selling': selling[i].text.strip(),
            'selled': selled[i].text.strip(),
            'year': year[i].text.strip(),
            'price': price[i].text.strip(),
            'ratio': ratio[i].text.strip(),
        }
        save_to_mongo(house)

def main():
    for page in range(1,101):
        url = 'http://esf.sz.fang.com/housing/__0_0_0_0_%d_0_0_0/'%page
        html = get_one_page(url)
        get_house_info(html)


if __name__ == '__main__':
    main()